{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import scipy.stats\n",
    "import re\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.stem import WordNetLemmatizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix\n",
    "import copy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Задание 1\n",
    "\n",
    "Вернемся к набору данных о видеоиграх.\n",
    "\n",
    "Ответьте на следующие вопросы:\n",
    "\n",
    "1) Как критики относятся к спортивным играм?  \n",
    "2) Критикам нравятся больше игры на PC или на PS4?  \n",
    "3) Критикам больше нравятся стрелялки или стратегии?\n",
    "\n",
    "Для каждого вопроса:\n",
    "\n",
    "- сформулируйте нулевую и альтернативную гипотезы;\n",
    "- выберите пороговый уровень статистической значимости;\n",
    "- опишите полученные результаты статистического теста."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Rank</th>\n",
       "      <th>Name</th>\n",
       "      <th>basename</th>\n",
       "      <th>Genre</th>\n",
       "      <th>ESRB_Rating</th>\n",
       "      <th>Platform</th>\n",
       "      <th>Publisher</th>\n",
       "      <th>Developer</th>\n",
       "      <th>VGChartz_Score</th>\n",
       "      <th>Critic_Score</th>\n",
       "      <th>...</th>\n",
       "      <th>NA_Sales</th>\n",
       "      <th>PAL_Sales</th>\n",
       "      <th>JP_Sales</th>\n",
       "      <th>Other_Sales</th>\n",
       "      <th>Year</th>\n",
       "      <th>Last_Update</th>\n",
       "      <th>url</th>\n",
       "      <th>status</th>\n",
       "      <th>Vgchartzscore</th>\n",
       "      <th>img_url</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Wii Sports</td>\n",
       "      <td>wii-sports</td>\n",
       "      <td>Sports</td>\n",
       "      <td>E</td>\n",
       "      <td>Wii</td>\n",
       "      <td>Nintendo</td>\n",
       "      <td>Nintendo EAD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.7</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2006.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://www.vgchartz.com/game/2667/wii-sports/?...</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>/games/boxart/full_2258645AmericaFrontccc.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Super Mario Bros.</td>\n",
       "      <td>super-mario-bros</td>\n",
       "      <td>Platform</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NES</td>\n",
       "      <td>Nintendo</td>\n",
       "      <td>Nintendo EAD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1985.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://www.vgchartz.com/game/6455/super-mario-...</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>/games/boxart/8972270ccc.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Mario Kart Wii</td>\n",
       "      <td>mario-kart-wii</td>\n",
       "      <td>Racing</td>\n",
       "      <td>E</td>\n",
       "      <td>Wii</td>\n",
       "      <td>Nintendo</td>\n",
       "      <td>Nintendo EAD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.2</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2008.0</td>\n",
       "      <td>11th Apr 18</td>\n",
       "      <td>http://www.vgchartz.com/game/6968/mario-kart-w...</td>\n",
       "      <td>1</td>\n",
       "      <td>8.7</td>\n",
       "      <td>/games/boxart/full_8932480AmericaFrontccc.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>PlayerUnknown's Battlegrounds</td>\n",
       "      <td>playerunknowns-battlegrounds</td>\n",
       "      <td>Shooter</td>\n",
       "      <td>NaN</td>\n",
       "      <td>PC</td>\n",
       "      <td>PUBG Corporation</td>\n",
       "      <td>PUBG Corporation</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017.0</td>\n",
       "      <td>13th Nov 18</td>\n",
       "      <td>http://www.vgchartz.com/game/215988/playerunkn...</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>/games/boxart/full_8052843AmericaFrontccc.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Wii Sports Resort</td>\n",
       "      <td>wii-sports-resort</td>\n",
       "      <td>Sports</td>\n",
       "      <td>E</td>\n",
       "      <td>Wii</td>\n",
       "      <td>Nintendo</td>\n",
       "      <td>Nintendo EAD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://www.vgchartz.com/game/24656/wii-sports-...</td>\n",
       "      <td>1</td>\n",
       "      <td>8.8</td>\n",
       "      <td>/games/boxart/full_7295041AmericaFrontccc.jpg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Rank                           Name                      basename  \\\n",
       "0     1                     Wii Sports                    wii-sports   \n",
       "1     2              Super Mario Bros.              super-mario-bros   \n",
       "2     3                 Mario Kart Wii                mario-kart-wii   \n",
       "3     4  PlayerUnknown's Battlegrounds  playerunknowns-battlegrounds   \n",
       "4     5              Wii Sports Resort             wii-sports-resort   \n",
       "\n",
       "      Genre ESRB_Rating Platform         Publisher         Developer  \\\n",
       "0    Sports           E      Wii          Nintendo      Nintendo EAD   \n",
       "1  Platform         NaN      NES          Nintendo      Nintendo EAD   \n",
       "2    Racing           E      Wii          Nintendo      Nintendo EAD   \n",
       "3   Shooter         NaN       PC  PUBG Corporation  PUBG Corporation   \n",
       "4    Sports           E      Wii          Nintendo      Nintendo EAD   \n",
       "\n",
       "   VGChartz_Score  Critic_Score  ...  NA_Sales  PAL_Sales  JP_Sales  \\\n",
       "0             NaN           7.7  ...       NaN        NaN       NaN   \n",
       "1             NaN          10.0  ...       NaN        NaN       NaN   \n",
       "2             NaN           8.2  ...       NaN        NaN       NaN   \n",
       "3             NaN           NaN  ...       NaN        NaN       NaN   \n",
       "4             NaN           8.0  ...       NaN        NaN       NaN   \n",
       "\n",
       "   Other_Sales    Year  Last_Update  \\\n",
       "0          NaN  2006.0          NaN   \n",
       "1          NaN  1985.0          NaN   \n",
       "2          NaN  2008.0  11th Apr 18   \n",
       "3          NaN  2017.0  13th Nov 18   \n",
       "4          NaN  2009.0          NaN   \n",
       "\n",
       "                                                 url  status Vgchartzscore  \\\n",
       "0  http://www.vgchartz.com/game/2667/wii-sports/?...       1           NaN   \n",
       "1  http://www.vgchartz.com/game/6455/super-mario-...       1           NaN   \n",
       "2  http://www.vgchartz.com/game/6968/mario-kart-w...       1           8.7   \n",
       "3  http://www.vgchartz.com/game/215988/playerunkn...       1           NaN   \n",
       "4  http://www.vgchartz.com/game/24656/wii-sports-...       1           8.8   \n",
       "\n",
       "                                         img_url  \n",
       "0  /games/boxart/full_2258645AmericaFrontccc.jpg  \n",
       "1                   /games/boxart/8972270ccc.jpg  \n",
       "2  /games/boxart/full_8932480AmericaFrontccc.jpg  \n",
       "3  /games/boxart/full_8052843AmericaFrontccc.jpg  \n",
       "4  /games/boxart/full_7295041AmericaFrontccc.jpg  \n",
       "\n",
       "[5 rows x 23 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_videogames = pd.read_csv('video_games_sales.csv')\n",
    "df_videogames[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1) Как критики относятся к спортивным играм?\n",
    "\n",
    "Проверим является ли распределение значений отзывов критивов спортивных игр нормальным."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEICAYAAABF82P+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de3xcdZn48c+T+7W595ZL0yttKW0psQXKVYRtESjquoIiKrIsK7ir/nTFy7quuutlXV/rhbVURVFRdBGkYLFVQBCwpSn0fqFpmjZpmiZN2qRJc515fn/MmTpNJ82kzcmZy/N+vfKamXO+35kn02ae+V7O9yuqijHGGDNYktcBGGOMiU6WIIwxxoRlCcIYY0xYliCMMcaEZQnCGGNMWJYgjDHGhOVqghCRZSKyR0RqROSBMOdXiMhWEdksItUickXIuToR2RY852acxhhjziRuXQchIsnAm8D1QAOwEbhdVXeGlMkBulRVRWQ+8GtVne2cqwOqVPVopK9ZXFyslZWVo/dLGGNMnNu0adNRVS0Jdy7FxdddDNSoai2AiDwGrABOJQhV7Qwpnw2cV7aqrKykutoaG8YYEykROTDUOTe7mEqB+pDHDc6x04jIO0RkN/A74K6QUwqsE5FNInKPi3EaY4wJw80EIWGOndFCUNUnnW6lW4Evh5xaqqqLgOXAfSJyVdgXEbnHGb+obmlpGY24jTHG4G6CaADKQx6XAY1DFVbVl4DpIlLsPG50bpuBJwl0WYWrt0pVq1S1qqQkbDeaMcaYc+BmgtgIzBSRqSKSBtwGrA4tICIzRESc+4uANKBVRLJFJNc5ng3cAGx3MVZjjDGDuDZIraoDInI/sBZIBh5W1R0icq9zfiXwLuBOEekHuoH3ODOaJgBPOrkjBfiFqv7erViNMcacybVprl6oqqpSm8VkjDGRE5FNqloV7pxdSW2MMSYsSxDGmCG1dvby8Mv7ebUm4utVTRxx80I5Y0wMa+3s5ebvvkxjew8AX7l1HndcOsXjqMxYshaEMSasrz67m6Odffzi75dw1awSvvK7nTR39HgdlhlDliCMMWdoPN7Nb984xHuXVHD59GK+dMuF9PuUh1+p8zo0M4YsQRhjzvDohgP4VfnwFVMBqCzO5ppZJfz2jUP4/fEz89GcnSUIY8xpVJXfbT3M0hnFlBdmnTp+68WlNHX0sL621cPozFiyBGGMOc2uwyeoaz3JjRdNOu349XMnkJGaxLqdRzyKzIw1SxDGmNP8YecRROCGuRNOO56RmsziqUW8bFNeE4YlCGPMaV7Zd5R5k/Moykk/49yVM4qpae7kcHu3B5GZsWYJwhhzSnefjzcOHuPy6UVhz18xsxiAV2tsHCIRWIIwxpxSfaCNfp9y+YzisOcvmJBLbnoKb9QfG+PIjBcsQRhjTnmlppWUJOEtlQVhzyclCfPL89hcf3yMIzNesARhjDll04E2LirLIytt6FV4Fpbns/vwCbr7fGMYmfGCJQhjDAA+v7L9UAcLyvLPWm5heQEDfmV7Y/sYRWa8YgnCGANATXMn3f0+5pflnbXcwvJAAtl80LqZ4p0lCGMMAFsbAh/484dpQZTkpjM5L4Nth6wFEe8sQRhjANja0E5OegrTirOHLTtn0jj2NJ0Yg6iMlyxBGGMA2HqonXml40hKkmHLzp6Uy76WTnoHbKA6nlmCMMbQN+BnV2PHsN1LQbMnjmPAr+xr7nI5MuMlSxDGGN48coI+n3/YAeqgOZNyAdjd1OFmWMZjriYIEVkmIntEpEZEHghzfoWIbBWRzSJSLSJXRFrXGDN6tgQHqEsja0FUFmWTlpLEbhuHiGuuJQgRSQYeBJYDc4HbRWTuoGLPAQtUdSFwF/DDEdQ1xoySbQ3t5GelUl6YGVH5lOQkZk3IYddha0HEMzdbEIuBGlWtVdU+4DFgRWgBVe1U1eD2VNmARlrXGDN6tjS0c1FpHiLDD1AHzZqQy94jnS5GZbzmZoIoBepDHjc4x04jIu8Qkd3A7wi0IiKu69S/x+meqm5paRmVwI1JJD39Pt48cmLYK6gHm16SQ1NHD529Ay5FZrzmZoII91XkjM1sVfVJVZ0N3Ap8eSR1nfqrVLVKVatKSkrOOVhjEtWOxg58fuWiCAeog6aX5ABQ22KtiHjlZoJoAMpDHpcBjUMVVtWXgOkiUjzSusaYc7fNGaAeaQtixvjABXX7LEHELTcTxEZgpohMFZE04DZgdWgBEZkhTqeniCwC0oDWSOoaY0bH1oZ2SnLTmTDuzB3kzmZKUTYpSUJNsyWIeDX0mr7nSVUHROR+YC2QDDysqjtE5F7n/ErgXcCdItIPdAPvcQatw9Z1K1ZjEtnWQ+0sKBvZADVAanISFUVZdrFcHHMtQQCo6hpgzaBjK0Pufx34eqR1jTGjq7N3gH0tndw8f/I51Z9ekmNdTHHMrqQ2JoFtP9SOKswvH9kAddD0khzqWrsY8PlHOTITDSxBGJPATi3xXXpuCWLG+Bz6fcrBtpOjGZaJEpYgjElgWxraKc3PpChnZAPUQdNLgjOZbBwiHlmCMCaBbW04fmqHuHMx1dk74kCrJYh4ZAnCmATV2tlLfVs3C85x/AEgPyuNvMxU6ixBxCVLEMYkqK0NgS1DR3qB3GCVRVkcaLUxiHhkCcKYBLW5/jhJAvPOcYA6aEpRNvuPWgsiHlmCMCZBbWk4zqwJuWSnn9/lUJXF2TQe77btR+OQJQhjEpCqsqX++Hl3L0Ggi8mv0HCsexQiM9HEEoQxCai+rZtjJ/tZcB4zmIKmFNlMpnhlCcKYBLQ5uILrecxgCqosygKg7qgNVMcbSxDGJKBNdW1kpSUza0LueT9XYXYauekp1oKIQ5YgjElAG/a3ccmUAlKTz/8jQESYUpxFnU11jTuWIIxJMMdP9rHnyAkWVxaO2nNOKcq2FkQcsgRhTILZWHcMVVg8dfQSRGVRFg3Huum3VV3jiiUIYxLMhtpW0lKSRmUGU9CUomwG/ErjcZvqGk8sQRiTYF6ra2NheT4Zqcmj9pyVzlRXu6I6vliCMCaBtJ/sZ/uhdi4dxe4l+OtUV1uTKb5YgjAmgby0twW/wtUXjB/V5y3JTScjNck2DoozliCMSSAv7G4mPyv1vPaACEdEKC/IsgQRZ1xNECKyTET2iEiNiDwQ5vz7RGSr8/OqiCwIOVcnIttEZLOIVLsZpzGJwO9X/vRmC1fPKiE5SUb9+SsKs6i3BBFXzm8Zx7MQkWTgQeB6oAHYKCKrVXVnSLH9wNWqekxElgOrgCUh569V1aNuxWhMInmj/jhtXX1cO8rdS0HlhVmsr21FVREZ/QRkxp6bLYjFQI2q1qpqH/AYsCK0gKq+qqrHnIfrgTIX4zEmoT29pZG0lCSum+Negujq83HsZL8rz2/GnpsJohSoD3nc4BwbyoeBZ0MeK7BORDaJyD1DVRKRe0SkWkSqW1pazitgY+LVgM/PM1sPc93s8eRmpLryGhWFgZlMNg4RP9xMEOHamBq2oMi1BBLEp0MOL1XVRcBy4D4RuSpcXVVdpapVqlpVUlJyvjEbE5fW17ZxtLOXWxZMdu01LEHEHzcTRANQHvK4DGgcXEhE5gM/BFaoamvwuKo2OrfNwJMEuqyMMefgF68dID8rlWtnu9O9BFBWkAlgA9VxxM0EsRGYKSJTRSQNuA1YHVpARCqAJ4D3q+qbIcezRSQ3eB+4AdjuYqzGxK3G492s3XGE97ylfFSvnh4sOz2F4pw0SxBxxLVZTKo6ICL3A2uBZOBhVd0hIvc651cCXwCKgP91Zj0MqGoVMAF40jmWAvxCVX/vVqzGxLOfrT+AqnLHkimuv1aZXQsRV1xLEACqugZYM+jYypD7dwN3h6lXCywYfNwYMzJtXX389NU6ls+bRLkzRuCmisIs3qg/NnxBExPsSmpj4thDL+2ju9/Hx6+fOSavV1GYRePxHgZs2e+4YAnCmDi1/2gXP36ljlsXljJj/PlvLRqJ8sJMfH7lcHvPmLyecZclCGPikKryhae2k56cxAPLZ4/Z65bbVNe4YgnCmDj09NbD/HnvUT617ALGj8sYs9e1ayHiiyUIY+JMe3c/X3p6J/PL8njfGMxcCjUpL5OUJLGprnHC1VlMxpix9821e2jr6uUnH3qLK6u2nk1yklBakGktiDhhLQhj4si2hnZ+vuEAd15WybzSPE9iKC/Iov6Y7U0dDyxBGBMn/H7lX5/aTlF2Op+4YZZncZTbvhBxwxKEMXHi8U0NbK4/zmeWz2acSyu2RqKiMIu2rj46ewc8i8GMDksQxsSBnn4f31y3h0UV+bxz0dlW1XdfeaEt2hcvLEEYEwd+vv4AzSd6+Zdlsz3fzc2musYPSxDGxLiu3gG+/6d9LJ1RxKXTirwO51SCsBZE7LMEYUyM+9n6A7R29fGJ6y/wOhQA8jJTyU1PsQQRByxBGBPD+n1+fvJKHUtnFHHJlAKvwwFARCgvtGW/44ElCGNi2LPbm2jq6OHDV0z1OpTTlBdm2rUQccAShDExSlX50cv7mVaczTWz3NtK9FxUONdC+P1ht6E3McIShDEx6vWDx9lSf5wPLa0kaYyX1BhORWEWvQN+Wjp7vQ7FnAdLEMbEqF++dpCc9BTeuajM61DOUGYzmeKCJQhjYlBn7wC/23qYmxdMIjs9+tbctGsh4oMlCGNi0DNbGunu9/HuqnKvQwmrND94NbUNVMcyVxOEiCwTkT0iUiMiD4Q5/z4R2er8vCoiCyKta0wi+1V1PTPG53Bxeb7XoYSVkZrMxHEZ1oKIca4lCBFJBh4ElgNzgdtFZO6gYvuBq1V1PvBlYNUI6hqTkPYeOcEbB4/znqpyz5fVOJvAVFdLELHMzRbEYqBGVWtVtQ94DFgRWkBVX1XVY87D9UBZpHWNSVSPv95ASpJw68XeLso3HFv2O/a5mSBKgfqQxw3OsaF8GHj2HOsakxD8fuXpzY1cObOYktx0r8M5q4rCLJo6eugd8HkdijlHbiaIcG3fsFfNiMi1BBLEp8+h7j0iUi0i1S0tLecUqDGxYtPBYzS297BiYfR/XyovyEIVDtkV1THLzQTRAIROsSgDGgcXEpH5wA+BFaraOpK6AKq6SlWrVLWqpKRkVAI3Jlo9tfkQGalJXD93gtehDKuiyKa6xrqIEoSI/EZE3i4iI0koG4GZIjJVRNKA24DVg563AngCeL+qvjmSusYkmn6fnzXbmrhuzoSovPZhsPIC52I5a0HErEg/8L8PvBfYKyJfE5HZw1VQ1QHgfmAtsAv4taruEJF7ReRep9gXgCLgf0Vks4hUn63uSH4xY+LNKzVHaevq45YFk70OJSLjc9NJS0mygeoYFtHXEFX9I/BHEckDbgf+ICL1wA+An6tq/xD11gBrBh1bGXL/buDuSOsak8hWb2kkNyOFay6Ija7UpCShrCDTEkQMi7jLSESKgA8S+EB/A/g2sAj4gyuRGWNO6en3sW7HEZbPm0h6SrLX4USswvaFiGkRtSBE5AlgNvAz4GZVPeyc+lWwW8gY454/7Wmhs3eAm2OkeymovCCLTQeODV/QRKVIR7p+6HT5nCIi6araq6pVLsRljAmxdkcT+VmpXBYFe06PREVhFid6Bmg/2U9eVqrX4ZgRirSL6Sthjv1lNAMxxoTXN+Dnj7uOcP2cCaQkx9b6muW2qmtMO2sLQkQmEriCOVNELuavF7CNA7Jcjs0YA7y67ygnegZYNm+i16GMWHmhs6rrsZNcVJbncTRmpIbrYvobAgPTZcC3Qo6fAD7rUkzGmBBrdzSRnZbM0hnFXocyYtaCiG1nTRCq+gjwiIi8S1V/M0YxGWMcPr+ybscRrp09nozU2Jm9FDQuI5X8rFSb6hqjhutiukNVfw5UisgnBp9X1W+FqWaMGSXVdW20dvWxfN4kr0M5ZzbVNXYN18WU7dzmuB2IMeZMv9/RRFpKUsxcHBdOeUEWOxrbvQ7DnIPhupgecm7/fWzCMcYEqSprtzdx1cySmFh7aSjlhVms29mEz68kJ0XvBkfmTJEu1vcNERknIqki8pyIHBWRO9wOzphEtvNwB43tPdwQAyu3nk1FYRb9PqWpo8frUMwIRTqp+gZV7QBuIrAU9yzgU65FZYzhhd3NAFwzO3a7lyBkqquNQ8ScSBNE8BLIG4FfqmqbS/EYYxzP725mflke43MzvA7lvFTYVNeYFWmCeFpEdgNVwHMiUgJYe9EYl7R19fFG/XGuuWC816Gct8n5mSQJNFiCiDkRJQhVfQC4DKhylvbuAla4GZgxiezFN5tRhbfOjv0EkZqcxKS8TGtBxKCRTI2YQ+B6iNA6Px3leIwxwPO7WyjOSWN+aXwsT1FeaAkiFkW63PfPgOnAZsDnHFYsQRgz6gZ8fl7c08z1cyeSFCfTQisKs3hhT4vXYZgRirQFUQXMVVV1MxhjDLx+8DgdPQNx0b0UVF6QRcuJXrr7fGSmxd6SIYkq0kHq7UDsLSVpTAz6894WkgSunBV7i/MNpaIoMJOp4Zh1M8WSSFsQxcBOEXkN6A0eVNVbXInKmAS2obaNeaV5jMuInw12Qld1nTkh1+NoTKQiTRBfPJcnF5FlBPauTiawK93XBp2fDfyYwN7Wn1PVb4acqyOwrLgPGLCd60wi6On3sbn+OB9cWul1KKOqvCCQIOxiudgSUYJQ1RdFZAowU1X/KCJZBD70hyQiycCDwPUErr7eKCKrVXVnSLE24J+AW4d4mmtV9WgkMRoTD944eJw+n58lUwu9DmVUFeekkZmazMG2bq9DMSMQ6VpMfw88DjzkHCoFfjtMtcVAjarWqmof8BiDrp1Q1WZV3Qj0jyhqY+LUhv2tiEBVZXwlCBGhvDCTehuDiCmRDlLfBywFOgBUdS8w3BSLUqA+5HGDcyxSCqwTkU0ics8I6hkTszbUtjF30jjyMuNn/CGoojDLuphiTKQJotdpBQDgXCw33JTXcBO4RzJNdqmqLgKWA/eJyFVhX0TkHhGpFpHqlhabZ21iV++Aj9cPHmPJ1CKvQ3FFWUFg4yCbLR87Ik0QL4rIZ4FMEbke+D/g6WHqNADlIY/LgMZIA1PVRue2GXiSQJdVuHKrVLVKVatKSmJ71UuT2LbUt9M74GfJtPjqXgqqKMziZJ+Ptq6+4QubqBBpgngAaAG2Af8ArAE+P0ydjcBMEZkqImnAbcDqSF5MRLJFJDd4H7iBwLUYxsStDbWtACyOs/GHoHJb1TXmRDqLyS8ivwV+q6oR9eOo6oCI3A+sJTDj6WFV3SEi9zrnV4rIRKAaGAf4ReRjwFwC1108KSLBGH+hqr8f4e9mTEzZsL+N2RNzKchO8zoUVwSX/a4/1s3FFQUeR2MicdYEIYFP6H8D7icwpiAi4gO+q6pfGu7JVXUNgdZG6LGVIfebCHQ9DdYBLBg2emPiRL/Pz6YDx/i7qnB/DvHBNg6KPcN1MX2MwOylt6hqkaoWAkuApSLycdejMyZBbG1op7vfx6XT4nOAGiArLYXinDRLEDFkuARxJ3C7qu4PHlDVWuAO55wxZhRs2O+MP8TZBXKDlRdm2RhEDBkuQaSGu5LZGYeIv4naxnhkQ20bM8fnUJST7nUoriovsAQRS4ZLEGebj2Zz1YwZBQM+P9V1bXE7vTVURWEWh9t76Pf5vQ7FRGC4WUwLRKQjzHEBYnsndWOixPbGDrr6fHF7gVyo8sJMfH7l8PGeU0uAm+h11gShqrazhzEuC17/kAgtiPJTU11PWoKIAZFeKGeMccmG/W1MK85mfG78N8qDy37bOERssARhjId8fmXj/sQYfwCYlJdBSpJwoNUSRCywBGGMh3Yd7uBE70BCjD8ApCQnUV6YxYHWLq9DMRGwBGGMh9Yn0PhDUGVRFnXWgogJliCM8dCG/W1MKcpiUl6m16GMmcribA60dtmy3zHAEoQxHvH7ldf2t8Xd9qLDmVqczck+H80ner0OxQzDEoQxHtnddIL27v6EGX8IqizKBmD/URuHiHaWIIzxSHD9pUQaf4C/JggbqI5+liCM8ciG2jZK8zMpK0isC8Ym52eQmizsP2oD1dHOEoQxHlBVXkuQ9ZcGC051rbMupqhnCcIYD+xt7qStq49LE2z8IWhqUTZ11sUU9SxBGOOBRFp/KZwpRdkcaD1pU12jnCUIYzywvraNieMyTu3TnGimFmfR3e/jSIdNdY1mliCMGWOqyob9rVw6rZDAtu+Jp7I4MJPJupmimyUIY8bYvpZOjnb2sSSO958eTnCqqw1URzdXE4SILBORPSJSIyIPhDk/W0T+IiK9IvLJkdQ1Jla9ui8w/nD59MRNEJPzM0lLTmK/tSCimmsJQkSSgQeB5cBc4HYRmTuoWBvwT8A3z6GuMTHp1ZpWSvMzE3b8ASA5SSgvzLQWRJRzswWxGKhR1VpV7QMeA1aEFlDVZlXdCPSPtK4xscjvV9bvb+Wy6UUJO/4QNLU42/aFiHJuJohSoD7kcYNzbFTrisg9IlItItUtLS3nFKgxY2VXUwfHT/YndPdS0NTibPYf7cLvt6mu0crNBBHu61Gk/xMirquqq1S1SlWrSkpKIg7OGC/8xRl/uMwSBNNLcugd8HPoeLfXoZghuJkgGoDykMdlQOMY1DUmar26r5VpxdkJtf/DUKaPzwGgprnT40jMUNxMEBuBmSIyVUTSgNuA1WNQ15io1DvgY31tK5fPsNYDwIwSSxDRLsWtJ1bVARG5H1gLJAMPq+oOEbnXOb9SRCYC1cA4wC8iHwPmqmpHuLpuxWrMWNhQ28bJPh9vnT3e61CiQkF2GkXZaZYgophrCQJAVdcAawYdWxlyv4lA91FEdY2JZc/vbiY9JYnLphV7HUrUmD4+h5oWSxDRyq6kNmYMqCrP725m6YxiMtOSvQ4naswYn0NNc6ct2helLEEYMwb2tXRxsO2kdS8NMqMkh/bufo529nkdignDEoQxY2DdziYASxCDzLCZTFHNEoQxY2D15kYWVeQzOd+mt4Y6lSBsHCIqWYIwxmV7mk6wu+kEKxZGupBA4piUl0F2WjL7rAURlSxBGOOypzYfIkngxosmeR1K1BGRwEwmSxBRyRKEMS7qHfDx6+p6rrlgPCW56V6HE5VmlFiCiFaWIIxx0Zpthzna2ccHLq/0OpSoNX18Dk0dPZzoGbyos/GaJQhjXKKqPPxyHdNKsrlyhl0cN5QLJuQCgbEaE10sQRjjknU7j7DtUDv3XjWdpKTE3vvhbOZMHgfArsMdHkdiBrMEYYwLevp9fO3Z3Uwryeadi2z20tlMzstgXEYKOw9bCyLauLoWkzGJ6hu/38P+o108evcSUpLte9jZiAhzJo2zFkQUsv+5xoyyX752kIdf2c8HLpvCUht7iMicSePY03QCn+0uF1UsQRgzSvp9fr61bg+feWIbV80q4fM3zfU6pJgxd9I4uvt9HGyzPaqjiXUxGXOefH7lma2NfPf5GmqaO3nXojL+853zSLWupYjNmfTXgeqpxdkeR2OCLEEYc456B3z8ZtMhHnppHwdaTzJrQg6r3n8JN1w40evQYs7MCTkkSSBB2BXn0cMShDHn4KU3W/jXp7ZzoPUk88vyWHnHIm6YO9Gms56jjNRkppXk2EB1lLEEYcwIDPj8fGPtHla9VMu04mweuWsxV80sRsQSw/maM2kcm+ravA7DhLAEYUyEegd83Pfo6/xxVzPvv3QKn79pDukptjvcaJlfmsfTWxppOdFr61ZFCRtFMyYCvQM+PvLzQHL491su5Mu3zrPkMMoWlOcDsLXhuMeRmCBXE4SILBORPSJSIyIPhDkvIvId5/xWEVkUcq5ORLaJyGYRqXYzTmPOxu9X/t+vt/Dc7ma+cus8W3jPJfNKx5EksKXeEkS0cK2LSUSSgQeB64EGYKOIrFbVnSHFlgMznZ8lwPed26BrVfWoWzEaE4n/eW4vz2w9zKeXzeaOS6d4HU7cykpLYdaEXN6wBBE13GxBLAZqVLVWVfuAx4AVg8qsAH6qAeuBfBGxOW4majy1+RDfeW4v776kjHuvnuZ1OHHv4op8ttQfR9WuqI4GbiaIUqA+5HGDcyzSMgqsE5FNInKPa1EaM4TXDx7jU49vZfHUQv7jHRfZTKUxsKAsn46eAepa7YrqaOBmggj31zT4a8HZyixV1UUEuqHuE5Grwr6IyD0iUi0i1S0tLecerTEhGo6d5J6fVjMpL4OH7riEtBSbzzEWggPVm+uPeRyJAXcTRANQHvK4DGiMtIyqBm+bgScJdFmdQVVXqWqVqlaVlJSMUugmkXX1DnD3I9X0Dvj50QfeQkF2mtchJYxZE3LJSU9hY50liGjgZoLYCMwUkakikgbcBqweVGY1cKczm+lSoF1VD4tItojkAohINnADsN3FWI0BAjOWPv6rzbx55AQPvncRM8bneB1SQklOEt5SWcCG2lavQzG4OItJVQdE5H5gLZAMPKyqO0TkXuf8SmANcCNQA5wEPuRUnwA86fT5pgC/UNXfuxWrMUFf//1u1u08whdvnstVs6xF6oUl04p4YU8LzSd6GJ+b4XU4Cc3VK6lVdQ2BJBB6bGXIfQXuC1OvFljgZmzGDPbQi/t46KVa7rxsil3r4KElUwsBeG1/GzfNn+xxNInNRt6MAX618SBffXY3Ny+YzBdvvtBmLHloXmke2WnJbKi1dZm8ZgnCJLzHNzXwmSe2cfWsEv773QtsRVaPpSYncUllIettHMJzliBMQnt0wwE++X9bWDqjmJU2nTVqXD69iL3NnRxu7/Y6lIRmfw0mYf3wz7V87sntXDd7PD+4s4rMNFt8L1pcN3s8AM/vbvY4ksRmCcIkHJ9f+dLTO/nK73axfN5Evn/HJWSkWnKIJjPG51BWkMkLliA8ZQnCJJTuPh8feXQTD7+ynw9eXsn33rvIupWikIhw3ezxvFxzlJ5+n9fhJCz7yzAJo7mjh9t/sJ51O4/whZvm8sVbLiTZBqSj1lvnTKCn389f9tlgtVcsQZiEsLGujbd/92X2NJ3g+++7hLuumOp1SGYYS6YWkpOewrPbD3sdSsKyBGHims+vrHxxH7evWk9Oegq/vW8py+ZN9DosE9w6Xq4AAA4JSURBVIGM1GRuuHACz25vsm4mj1iCMHFrZ2MHf7vyVb727G7eNmcCT92/lAsm5nodlhmBWxeWcqJngD/tscFqL7i61IYxo6HlRC8H27o42ecjMzWZguw0xuemk5uRekZZn195bX8bj244wO+2HSYvM5Vv37aQWxZMtqujY9Dl04sozknnN68fYtk820tsrFmCMFFp1+EOfrWxnrU7mjjc3hO2THZaMhPyMpg4LoOc9BSOn+xn5+EOOnsHyE1P4R+ums4/Xj2dvKwzE4mJDSnJSfztJWWsemkfh453U5qf6XVICcUShIkqOxs7+K+1u3lhTwvpKUlcNauEu6+cxvSSbLLTU+ju89HW1ceRjh6aOno40tHD4fYejnb2kp+ZxoqFk1kyrYjr50ywC9/ixB2XVrDqpX08uv4A/7JsttfhJBRLECYqtHf38611e/jZ+gPkZabyyRtm8f5LK+3bv6GsIIu3zZnAL187yEeunUFOun1sjRUbpDae8vuVX1fX89Zv/omfrT/A+5ZM4YVPXsP9b51pycGc8pFrZ3DsZD8/eWW/16EkFEvFxjO7mzr4/JPbqT5wjEUV+Txy12LmleZ5HZaJQgvL83nbnPGseqmW9y2ZYtvAjhFrQZgxd/xkH195Zidv/87L7Gvp5Bvvms/j915uycGc1af+ZjYn+3x89dldXoeSMKwFYcZMV+8AP35lPw+9VEtn7wB/d0k5Dyyfbd8GTUQumJjL3181je//aR9vnz+Zq21LWNdZgjCuq287yc/XH+CxjfW0d/fztjkT+OTfzGL2xHFeh2ZizD9fN5PndzXzz4+9wdP3X0F5YZbXIcU1SxDGFcdP9rF2RxPPbD3MKzVHERGWXTiRD185lUUVBV6HZ2JURmoyD73/Em7+3svc8aMNPHbPpUzKs2sj3CKq6t6TiywDvg0kAz9U1a8NOi/O+RuBk8AHVfX1SOqGU1VVpdXV1aP7S5iInOjpZ9uhdv6yr5VX97Wypf44A36lojCLFQsnc/viCibbRU5mlLx+8Bgf+NFr5GSk8L33XswlUwq9DilmicgmVa0Ke86tBCEiycCbwPVAA7ARuF1Vd4aUuRH4KIEEsQT4tqouiaRuOJYg3OX3K0dO9FDf1k1920kOtp3kzSMn2Hm4gwOtJwFIEphfls/l04tYNm8iF5Xm2RIXxhXbD7Xzj49uouFYN+++pIz7rp3BlKJsr8OKOWdLEG52MS0GalS11gniMWAFEPohvwL4qQay1HoRyReRSUBlBHXNKOgd8NHV66Ord4DO3gHau/tpOdFL84le57aHlhO9HDrWTcOxbvp8/lN1RaCyKJt5k/P4u6py5k4axyWVBYwLs0aSMaNtXmkez3z0Sr773F4e+Usdv65u4JIpBVw2rYgF5fmUF2YyKS+TnPQU2/fjHLmZIEqB+pDHDQRaCcOVKY2w7qi56bt/pqffT7A1dapNpafdnHke0FNl9PTHgxpmg+uGnj+j7hllwp8/2/MO97sA9A34T/vAHyw1WSjJSackN53Zk3K5/sIJlBdkUV6YRXlBJqUFmaSn2HIWxjt5mal8/qa53H3lNH7zegNrdzTx/Rf34fOf/geYlZZMdnoKKUlCkggikBxyXyCmW7qFWWn8+t7LRv153UwQ4d7twf1ZQ5WJpG7gCUTuAe4BqKioGEl8p8woyaHfp6dFFAwg+J/mr49PPx+uzF+fQ8LWkUHnTzs26ElGVHdQPGeeP/05U5OTyElPJic9hez0FHLSUxiXmUpJbjrjc9PJy0yN6T8akzgm5mVw37UzuO/aGXT1DrC3uZP6tpM0tffQ2TtAV+8AXX0D+PyKzx/4AuVXxa/gVz3jC12syc1w56PczQTRAJSHPC4DGiMskxZBXQBUdRWwCgJjEOcS6P/cdvG5VDPGRKHs9BQWluezsDzf61BinptXUm8EZorIVBFJA24DVg8qsxq4UwIuBdpV9XCEdY0xxrjItRaEqg6IyP3AWgJTVR9W1R0icq9zfiWwhsAMphoC01w/dLa6bsVqjDHmTK5eBzHWbJqrMcaMzNmmudpifcYYY8KyBGGMMSYsSxDGGGPCsgRhjDEmLEsQxhhjwoqrWUwi0gIcGKWnKwaOjtJzjSWLe2xZ3GMrFuOO9pinqGrY3ZfiKkGMJhGpHmrqVzSzuMeWxT22YjHuWIw5yLqYjDHGhGUJwhhjTFiWIIa2yusAzpHFPbYs7rEVi3HHYsyAjUEYY4wZgrUgjDHGhGUJwiEi/yUiu0Vkq4g8KSJhF5MXkToR2SYim0XEs5UBRWSZiOwRkRoReSDMeRGR7zjnt4rIIi/iHBRTuYi8ICK7RGSHiPxzmDLXiEi78/5uFpEveBHrYMP9u0fp+31ByPu4WUQ6RORjg8pExfstIg+LSLOIbA85VigifxCRvc5twRB1z/q3MMYxx9TnyLBU1X4C3Ww3ACnO/a8DXx+iXB1Q7HGsycA+YBqBzZW2AHMHlbkReJbARnOXAhui4D2eBCxy7ucCb4aJ+xrgGa9jHem/ezS+32H+zzQRmPMede83cBWwCNgecuwbwAPO/QfC/U1G8rcwxjHHzOdIJD/WgnCo6jpVHXAeriewi120WgzUqGqtqvYBjwErBpVZAfxUA9YD+SIyaawDDaWqh1X1def+CWAXgf3H40HUvd+DXAfsU9XRupB0VKnqS0DboMMrgEec+48At4apGsnfgivCxRxjnyPDsgQR3l0Evg2Go8A6Ednk7IfthVKgPuRxA2d+0EZSxjMiUglcDGwIc/oyEdkiIs+KyIVjGtjQhvt3j+r3m8CujL8c4lw0vt8AEzSwwyTO7fgwZaL5fY/2z5FhubknddQRkT8CE8Oc+pyqPuWU+RwwADw6xNMsVdVGERkP/EFEdjvfJMaShDk2eDpaJGU8ISI5wG+Aj6lqx6DTrxPoBukUkRuB3wIzxzrGMIb7d4/m9zsNuAX4TJjT0fp+Ryoq3/cY+RwZVkK1IFT1bao6L8xPMDl8ALgJeJ86HYVhnqPRuW0GniTQxB1rDUB5yOMyoPEcyow5EUklkBweVdUnBp9X1Q5V7XTurwFSRaR4jMM8QwT/7lH5fjuWA6+r6pHBJ6L1/XYcCXbTObfNYcpE3fseQ58jw0qoBHE2IrIM+DRwi6qeHKJMtojkBu8TGJDaHq6syzYCM0VkqvPt8DZg9aAyq4E7ndk1lwLtwea6V0REgB8Bu1T1W0OUmeiUQ0QWE/g/2jp2UYaNKZJ/96h7v0PczhDdS9H4fodYDXzAuf8B4KkwZSL5WxgzMfY5MjyvR8mj5QeoIdCXudn5Wekcnwysce5PIzBLYguwg0DXlFfx3khgFtC+YBzAvcC9zn0BHnTObwOqouA9voJA839ryPt846C473fe2y0EBvkuj4K4w/67R/v77cSVReADPy/kWNS93wQS2GGgn0Cr4MNAEfAcsNe5LXTKnvqbdB6f8bfgYcwx9Tky3I9dSW2MMSYs62IyxhgTliUIY4wxYVmCMMYYE5YlCGOMMWFZgjDGGBOWJQgTc0SkTESeclb53Cci33bmwBtjRpElCBNTnIu6ngB+q6ozgVlADvAfngZmTByyBGFizVuBHlX9MYCq+oCPA3eJSJaIfFBEvhcsLCLfE5EPOveznTX8N4rIGyKywjl+tjp1IlIsIjki8oqI3BB63Ln/89A9AUKe5xoReca5f7WIbBCRPOf4S85+ATtFZKWIJDnlOkPq/1lEnhGRTPnrfg19IfsIVInIT0Tkb53yd4uIOvFWBmMSkTnOYnzlzuNPiMh25+djzrFKEel2nrdWRL45Kv9aJqYl1GJ9Ji5cCGwKPaCqHSJyEJgxTN3PAc+r6l0S2MjlNQks4DicVOBnwPdVdV3oCRG5CJh3tspOmW8DN6pqu7OyxWJgLnAA+D3wTuDxkDpvB/IILNnRDSx0jtcB16rqUedxsHwGgSukT1uvSERKCSyB/V5VrReRS4APAUsIXP29QUReBI4RWA58oYhMIHCF7ycjeG9MHLMWhIk1QvjVOoc6HuoG4AER2Qz8CcgAKpxz7wl+SwfeM6jeD4BJqvrzMM/5FeDfzvKakwks+fyIOgu0OV7TwB4GPgJLNlxx6hcJfOp/DvjPYX6fUPcR2DOhO+RYDoHk8ydV3eEcuwJ4UlW7NLBI3xPAlc656c7v/yaBhGYSnCUIE2t2AFWhB0RkHIEVPfcNU1eAd6nqQuenQlV3Oed+FTwO/GpQvb3AFhG5a9Dxy4FOAmvqDGU28BHgH0SkJOT44GQW+vh2AgmsaZjfJ2icU+ehQcfLga8C14rIHOdYuOWxg/Y5v/8k4PZgl5RJXJYgTKx5DsgSkTsBRCQZ+G/gJzrE6pkh1gIfDVm99OIIX/M/gE8A/+J0vwR9ERhuD+fnVXU1gdZA6Lfyxc4KpEkEWiwvO8eTCIypfCPC2HDKf0cDO6qF2qWqvwA+Cjzk/N4vAbc64zXZwDuAPw+q1wv4gLB7QJvEYQnCxBQNrC75DuDdIrKXQHdID/DZkGLvFJGXReRlAn37nxWRicCXCYwnbHUGcL88gtdtBb4EfDfk8AZVHa7VEqz/U6BIApvyAPwF+BqBZZ73E9gTACATeFxVj0caG4FWQbjur+BrvwjsBv5RA1u+/gR4jcBufj9U1TecosEupu3AC6q6dQQxmDhkq7mauCciPwG+qKp1HocCBGY3AZ9U1Zu8jsWYs7EWhEkEvyEwS8cYMwLWgjDGGBOWtSCMMcaEZQnCGGNMWJYgjDHGhGUJwhhjTFiWIIwxxoRlCcIYY0xY/x9+0KTB+aay8wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Распределение значений отзывов критивов спортивных игр не является нормальным\n"
     ]
    }
   ],
   "source": [
    "df_sport_games_critics = df_videogames[df_videogames.Genre == 'Sports']['Critic_Score'].dropna()\n",
    "df_sport_games_critics.plot(kind='density')\n",
    "plt.xlabel('Оценки критиков')\n",
    "plt.show()\n",
    "\n",
    "stat, p = scipy.stats.shapiro(df_sport_games_critics)\n",
    "alpha = 0.05\n",
    "\n",
    "if p < alpha:\n",
    "    print('Распределение значений отзывов критивов спортивных игр не является нормальным')\n",
    "else:\n",
    "    print('Распределение значений отзывов критивов спортивных игр нормальное')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Попробуем привести выборку к нормальному распределению с помощью бутстрапа."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEGCAYAAABvtY4XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXhcZ3n38e892nfJkrxKlmx5jY3lRXFsx2QlBJIQtgAJZSmBNwUCFOgCpe0LLS2FLrzQ0lLSEAhLQ2gWQgNJgCSObeJNsR3vlvdYtrVasjZLGs3c7x8zcmR5JI2kOXNmuT/XNZdnzpzlJ3l06+g5z3keUVWMMcYkHo/bAYwxxjjDCrwxxiQoK/DGGJOgrMAbY0yCsgJvjDEJKtXtAEOVlJRoZWWl2zGMMSZuvPLKKy2qWhrqvZgq8JWVldTW1rodwxhj4oaInBrpPWuiMcaYBGUF3hhjEpQVeGOMSVAx1QZvjDFu8Hq91NfX09vb63aUEWVmZlJWVkZaWlrY21iBN8Ykvfr6evLy8qisrERE3I5zBVWltbWV+vp65syZE/Z21kRjjEl6vb29FBcXx2RxBxARiouLx/0XhmMFXkQWisjuIY8OEfmsU8czxpjJiNXiPmgi+RxrolHVw8ByABFJAc4ATzp1PGNiyYGzHbxU18wbZhWwfn6J23FMkopWE83NwDFVHbFDvjGJ4qndZ7jj3zbxjWcP8YHvb+Mbzx5yO5KJE88++ywLFy5k3rx5fP3rX5/0/qJV4O8GHgn1hojcJyK1IlLb3NwcpTjGOONkSzdfeHwPV1dOYetf3Mw9q8v57oZjPLe/we1oJsb5fD7uv/9+nnnmGQ4cOMAjjzzCgQMHJrVPxwu8iKQDdwL/E+p9VX1AVWtUtaa0NORwCsbEja8+fYC0FA/fvnsF0wsy+du3L2XhtDy+9uuD9A/43Y5nYtj27duZN28ec+fOJT09nbvvvpunnnpqUvuMRjfJtwI7VbUxCscyxjX7z17g+UNN/OmbFzC9IBOAtBQPX3zrIj7ywx08uaue91092+WUZix/87/7OXC2I6L7vGpmPl9+25JR1zlz5gzl5eWXXpeVlbFt27ZJHTcaTTT3MELzjDGJ5KHNJ8lJT+GDaysvW37DwlIWTc/j4ZdPYXMgm5GE+mxMtmePo2fwIpIN3AL8kZPHMcZt3X0DPLPvHHdWz6Qg6/I7DUWED62t5EtP7mXX6XZWzi5yKaUJx1hn2k4pKyvj9OnTl17X19czc+bMSe3T0TN4Ve1R1WJVveDkcYxx27P7Gujp9/HuVWUh339b9QzSUz38cvfZKCcz8eLqq6/myJEjnDhxgv7+fn72s59x5513TmqfdierMRHwi91nqCjOpqYi9Nl5XmYaNy2cyq/2nsPnt2Yac6XU1FS+853vcOutt7J48WLe+973smTJ5P6asLFojJmk7r4Bth5v5d5r54zaZvq26pk8u7+BbcdbWTfPbn4yV7rtttu47bbbIrY/O4M3ZpK2Hm/F61OuWzB6N9+bFk0lM81jfeJN1FiBN2aSNh1pISsthZrK0S+eZqWnsH5eCc8farLeNCYqrMAbM0kb65pZM3cKGakpY65746Kp1Ldd5EhTVxSSmfGI9V+6E8lnBd6YSTh9vofjLd1jNs8MumnRVABeONTkZCwzTpmZmbS2tsZskR8cDz4zM3Nc29lFVmMmYeORwPhJb5wfXoGfUZDFVTPyeeFgEx+/vsrJaGYcysrKqK+vJ5bHwxqc0Wk8rMAbMwkb65qZVZhFVWlO2NvctGgq333pGB29XvIzw59+zTgnLS1tXDMlxQtrojFmggZ8fl4+2sp1C0rGdUv5+vkl+PzK1mOtDqYzxgq8MRO2+3Q7nX0DXBdm88ygFbMLyUpL4WUr8MZhVuCNmaCNdc14hHHftJSRmsLqOVPYfLTFoWTGBFiBN2aCXjrSwvLywisGFwvH+nklHG3qouHC+CZRNmY8rMAbMwFt3f3sqW8Pu3vkcNcGz/p/b2fxxkFW4I2ZgN8fa0GVCRf4RdPzKM5JtwJvHGUF3pgJ2FjXTH5mKtVlhRPa3uMR1s0rYfPRlpi9ucbEPyvwxoyTqrKxroX180tI8Ux8xp3184pp6uzjqA1bYBxiBd6YcTrS1EVDR++4u0cOt64q0A5vvWmMU6zAGzNOG+sCt7NPtP19UPmUbCqKs9l8xAq8cYYVeGPGaeORFuZNzWVmYdak97WuqoTtJ84z4PNHIJkxl3O0wItIoYg8JiKHROSgiKx18njGOK3X62Pb8dZJN88MWldVTGffAPvOdkRkf8YM5fQZ/LeBZ1V1EVANHHT4eMY4avuJ8/QN+HnjgshMubdmbjEALx+zZhoTeY4VeBHJB64Dvg+gqv2q2u7U8YyJho11zaSnelgzpzgi+yvNy2DhtDy22Lg0xgFOnsHPBZqBH4jILhF5UESuGFNVRO4TkVoRqY3lsZiNgcD0fKsrp5CVPvbsTeFaW1XMjpPn6RvwRWyfxoCzBT4VWAl8V1VXAN3AF4evpKoPqGqNqtaUlkamXdMYJzRc6OVwYyfXRah5ZtDaqmJ6vX5ePX0hovs1xskCXw/Uq+q24OvHCBR8Y+LS4OxNk+0eOdyaOcWIWDu8iTzHCryqNgCnRWRhcNHNwAGnjmeM01463MzUYJt5JBVkp7F0ZoGND28izuleNJ8Gfioie4DlwNccPp4xjhjw+dl0pJkbFpaOa/amcK2rKmbXa21c7Ld2eBM5jhZ4Vd0dbF9fpqrvUNU2J49njFN2nW6no3eAGxZOdWT/a6uK8fqU2lPnHdm/SU52J6sxYdhwuIkUj7B+fmQvsA66unIKqR6xZhoTUVbgjQnDhsPNrKooIj9z/LM3hSMnI5Xq8kIr8CairMAbM4amjl72n+3ghoXOduNdV1XM3vp2Onq9jh7HJA8r8MaMYUNw9MgbFjjT/j5obVUxfoUdJ6wd3kSGFXhjxvDioSam5WeweEZku0cOt3J2EempHmumMRFjBd6YUfR6fWw43Mybr5ruSPfIoTLTUqipKLICbyLGCrwxo9hY18xFr49bl0yPyvHWVRVz8FwH57v7o3I8k9iswBsziuf2N1KQlcY1c6dE5Xhrg9P4bT1uZ/Fm8qzAGzMCr8/P84cauXnxVNJSovOjsqysgOz0FBuXxkSEFXhjRrD9xHnae7xRa54BSEvxsHrOFBsf3kSEFXhjRvD0nrNkp6dEbHq+cK2rKuZYczeNHb1RPa5JPFbgjQmh1+vj6T3neMuS6RGd3CMc64Lt8HYWbybLCrwxIbx4qInO3gHesWJW1I+9eEY+BVlp1g5vJs0KvDEhPLHrDKV5GVw7z5nBxUaT4hHWzJ1i/eHNpFmBN2aYtu5+Nhxu4u3VM0nxOHtz00jWVZVQ33aR0+d7XDm+SQxW4I0Z5sldZ/D6lHeujH7zzKC1VcWATeNnJscKvDFDqCo/3XaK5eWFLJlZ4FqO+VNzKclNtwutZlKswBszxJbjrRxr7uYDaypczSEirK0q4eVjraiqq1lM/LICb8wQP936GoXZadyxbIbbUVhXVUxTZx/HmrvdjmLilKMFXkROisheEdktIrVOHsuYyWrs6OW5/Q28Z1UZmWnR7fseyrpgO/wWa4c3ExSNM/gbVXW5qtZE4VjGTNiPt5zCp+p688yg2VOymVWYZd0lzYRZE40xBO5c/em2U9yyeBoVxTluxwEG2+GL2XK8Fb/f2uHN+Dld4BX4jYi8IiL3hVpBRO4TkVoRqW1ubnY4jjGhPbnrDG09Xu5dP8ftKJdZO7eY9h4vBxs63I5i4pDTBf5aVV0JvBW4X0SuG76Cqj6gqjWqWlNaGt1BnYyBQNfIhzafYMnMfK6ZE51x38O19lI7vDXTmPFztMCr6tngv03Ak8BqJ49nzERsPtrCkaYu7r12juPT8o3XzMIs5pTkWIE3E+JYgReRHBHJG3wOvBnY59TxjJmo728+QUluBndUu981MpS1VcVsO3GeAZ/f7Sgmzjh5Bj8N2CwirwLbgV+p6rMOHs+YcTvR0s2Gw818YM1sMlLd7xoZyrqqYrr6Bth75oLbUUycSXVqx6p6HKh2av/GRMKjO06T4hHev3q221FGtGbu4Lg0rayYXeRyGhNPrJukSVpen5/Hd9Zz48KpTM3PdDvOiEpyM1g0Pc/a4c24WYE3SevFQ000d/bxvqvL3Y4ypjVzi9lx8jx9Az63o5g4YgXeJK3HXqmnNC+DGxfGfvfcdVXF9A342fVau9tRTByxAm+SUlffABvqmrn9DTNITYn9H4Nr5hbjEesPb8Yn9j/ZxjjgxUNN9A/4eevS6W5HCUtBVhpLZxVYgTfjYgXeJKVn9zVQkptBTWVs3bk6mrVzi9l1uo1er7XDm/BYgTdJ52K/jxcONfGWpdNcm3N1IlZVFOH1qfWHN2GzAm+SzpbjLVz0+rh1SXw0zwxaWRHoA7/zVJvLSUy8sAJvks7GuhYyUj1cHUfNMxDoDz97SjY7X7MCb8JjBd4knc1HW1g9Z0pMzNo0XitnF7LztXabp9WExQq8SSrnLlzkaFMX182P/b7voaysKKK5s4/6totuRzFxwAq8SSqbjgTmN10/v8TlJBOzMjgWjTXTmHBYgTdJ5fdHWy6N7RKPFk3PIystxe5oNWGxAm+Syo4T57lm7pSYm9gjXKkpHpaVFdgZvAmLFXiTNM62X+TshV5qKuJ7yN2VFUUcONthNzyZMVmBN0njlWD/8VXxXuBnFzHgV/bU2w1PZnRW4E3SeOVUG1lpKSyeke92lElZMbsQsAutZmxW4E3S2PlaG9XlBaTFweiRoynJzaCiONvuaDVjiu9PujFh6ukfYP/Zjrhvnhm0vLyQV+utJ40ZneMFXkRSRGSXiDzt9LGMGcne+gv4/HqpH3m8qy4rpLGjj4YLvW5HMTEsrAIvIo+LyO0iMpFfCH8MHJzAdsZEzOAIjMvKCl1OEhnV5QUAdhZvRhVuwf4u8H7giIh8XUQWhbORiJQBtwMPTjCfMRGx/2wH0/IzKM3LcDtKRCyZWUCKR9hjBd6MIqwCr6q/U9U/AFYCJ4HfisjLIvIREUkbZdNvAX8O+EdaQUTuE5FaEaltbm4eR3Rjwrf3zAXeMKvA7RgRk5mWwsJpebx62rpKmpGF3eQiIsXAHwIfA3YB3yZQ8H87wvp3AE2q+spo+1XVB1S1RlVrSkvjcwAoE9t6+gc41tzFkpmJU+ABqssL2VPfjt9vI0ua0MJtg38C2ARkA29T1TtV9VFV/TSQO8Jm1wJ3ishJ4GfATSLykwhkNmZcDp7rQBWWJtAZPEB1WQEdvQOcbO12O4qJUeGewT+oqlep6j+o6jkAEckAUNWaUBuo6l+oapmqVgJ3Ay+o6gciEdqY8dh3pgOApbPi+wan4arLAxeM7Y5WM5JwC/zfhVi2JZJBjHHKvjMXKMlNZ3p+pttRImr+1Fwy0zzWk8aMKHW0N0VkOjALyBKRFcDgEHz5BJprwqKqG4ANE4tozOTsPXOBJTML4nYEyZGkpnhYOrOAV09bgTehjVrggVsJXFgtA745ZHkn8CWHMhkTMb1eH0eaurh58VS3oziiuryQn2w9hdfnj/shGEzkjVrgVfVh4GERebeqPh6lTMZEzNGmLnx+jfsBxkayrKyAvgE/hxs6E+4ispm8sZpoPqCqPwEqReTzw99X1W+G2MyYmFHX2AkQtzM4jWX5kAutVuDNcGP9TZcT/DcXyAvxMCamHW7sJD3FQ0Vxztgrx6HZU7IpzE6zdngT0lhNNN8L/vs30YljTGQdbuhkbmlOwrZPiwjLymxkSRNauDc6/aOI5ItImog8LyItImJ92k3Mq2voTNjmmUHVZQXUNXbS0z/gdhQTY8I9rXmzqnYAdwD1wALgzxxLZUwEdPR6OXuhlwUJX+AL8WtgQDVjhgq3wA8OKHYb8IiqnncojzERcyR4gXXhtMQu8MsGhw62dngzzFj94Af9r4gcAi4CnxSRUsBmGjAx7VBDsMAn+Bn81LxMZhZk8qoNWWCGCXe44C8Ca4EaVfUC3cDbnQxmzGTVNXSSk57CrMIst6M4bllZoY0Nb64Q7hk8wGIC/eGHbvOjCOcxJmION3ayYHpewg1REEp1eSHP7m+grbufopx0t+OYGBFuL5ofA/8MrAeuDj5CjiJpTCxQVQ4nQQ+aQdVlgXb4PWesmca8Ltwz+BrgKlW1mQVMXGju6qOtx8uCBL/AOmhp2esXWq9fYBPnmIBwe9HsA6Y7GcSYSKpr6AISvwfNoPzMNKpKc6wnjblMuGfwJcABEdkO9A0uVNU7HUllzCQdDnaRTPQ+8EMtLy9iw+EmVDUprjuYsYVb4L/iZAhjIu1wQwcluemU5Ga4HSVqls8u5PGd9dS3XaR8StjTNZgEFm43yZeAk0Ba8PkOYKeDuYyZlMONXUnT/j5oeVlgZMnd1kxjgsLtRfN/gMeA7wUXzQJ+4VQoYybD71eONHYmXYFfNCOPjFSPFXhzSbgXWe8HrgU6AFT1CJCYU+SYuHem/SI9/b6k6SI5KC3Fw9JZBVbgzSXhFvg+Ve0ffBG82WnULpMikiki20XkVRHZLyI25LCJisEhCpLpAuug5eWF7DtzAa/P73YUEwPCLfAviciXCEy+fQvwP8D/jrFNH3CTqlYDy4G3iMiaiUc1JjyDszjNn5rrcpLoW15eeGkKP2PCLfBfBJqBvcAfAb8G/mq0DTSgK/gyLfiwG6WM4w43dFJWlEVeZtrYKyeYwSn8dlkzjSH8XjR+AhdVP6mqd6nqf4VzV6uIpIjIbqAJ+K2qbptcXGPGVtfYmTQ3OA1XVpRFcU46u1+zAm/GKPAS8BURaQEOAYdFpFlE/m84O1dVn6ouB8qA1SKyNMQx7hORWhGpbW5unsjXYMwlXp+fY81dSdn+DoEp/JaXF7L7dJvbUUwMGOsM/rMEes9crarFqjoFuAa4VkQ+F+5BVLUd2AC8JcR7D6hqjarWlJbaGBpmck62dOP1adKewUOgmeZYczcdvV63oxiXjVXgPwTco6onBheo6nHgA8H3RiQipSJSGHyeBbyJwF8BxjjmUg+aJC7w1cF2+D2nbWTJZDdWgU9T1ZbhC1W1mden8RvJDOBFEdlD4M7X36rq0xOLaUx46ho7SfEIc0tz3I7imsECb800ZqyxaPon+B6qugdYMe5ExkzC4YZOKouzyUxLcTuKawqy0phbmmM3PJkxC3y1iISaql2ATAfyGDMpdY2dXDUz3+0YrlteXsjGuhYbWTLJjdpEo6opqpof4pGnqsnXydjEtIv9Pk6d72HhNCvwK8oLaenq40z7RbejGBeFe6OTMTHvSFMnqrBwevLdwTrc6+3w1kyTzKzAm4Rx2HrQXLJoej7pqR674SnJWYE3CaOusZP0VA8Vxcnbg2ZQeqqHpTPz7Qw+yVmBNwnjcGMX86fmkuKxi4oAK2YXsffMBfoHbGTJZGUF3iSMuobkHYMmlFUVRfQN+Nl/1m54SlZW4E1CuNDjpaGjN2nHoAllVUURAK+cshuekpUVeJMQDjYEbtdYaAX+kmn5mZQVZVmBT2JW4E1C2H82UOCX2E1Ol6mpKKL2VBthjO5tEpAVeJMQDpztoDQvg6l5doP1UKsqp9Dc2Ud9m93wlIyswJuEsP/sBa6aYWfvw62aHWiHrz113uUkxg1W4E3c6xvwcbSpy5pnQlg4PY+8jFRqT1o7fDKyAm/i3pHGLgb8ypKZBW5HiTkpHmH57EK70JqkrMCbuDfYz9tGkQxtVUURhxs7bYanJGQF3sS9A2c7yM1IpWJKtttRYlJNxRRUYZeNS5N0rMCbuLf/bAeLZ+ThsSEKQlo+uxCP2A1PycgKvIlrPr9y8FyH9aAZRW5GKoum5/OK9aRJOlbgTVw71txFd7+PZWWFbkeJaTWVRex6rZ0Bnw08lkyswJu4Njje+fLZVuBHs6qiiJ5+H4eCY+ab5OBYgReRchF5UUQOish+Efljp45lkteu0+3kZ6Yyx8aAH9XVlVMA2Hq81eUkJpqcPIMfAP5EVRcDa4D7ReQqB49nktDu0+1UlxfaBdYxzCzMoqI4m63HrR0+mThW4FX1nKruDD7vBA4Cs5w6nkk+Pf0DHG7oYHm5Nc+EY+3cYrafaMXnt4HHkkVU2uBFpBJYAWwL8d59IlIrIrXNzc3RiGMSxN76C/gVK/BhWjO3mI7eAQ6e63A7iokSxwu8iOQCjwOfVdUrPlmq+oCq1qhqTWlpqdNxTAIZnG/UCnx4rplr7fDJxtECLyJpBIr7T1X1CSePZZLPjpPnqSzOpjg3w+0ocWFGQRaV1g6fVJzsRSPA94GDqvpNp45jkpPPr2w/cZ5r5hS7HSWurJlbzDZrh08aTp7BXwt8ELhJRHYHH7c5eDyTRA41dNDRO3Cp2cGEZ21VMZ3WDp80Up3asapuBqzvmnHEtmAzwzVz7Qx+PAb/4tl6vJWls2x45URnd7KauLTtRCtlRVnMKsxyO0pcmV6QyZySHH5/tMXtKCYKrMCbuOO39vdJuW5+CVuPn6fX63M7inGYFXgTdw6c66Ctx8vaKivwE3H9wlIuen02jV8SsAJv4s6Gw00AXL/A7puYiDVzi0lP8fBSXZPbUYzDrMCbuLPhcDNLZ+VTmmf93yciOz2V1XOm8FKd3Tme6KzAm7hyocfLztfauGHBVLejxLXrF5RS19jF2faLbkcxDrICb+LKpqPN+BVuXGTNM5Nxw8LA98/O4hObFXgTV54/2ERBVhrVNoPTpMybmsvMgsxL1zNMYrICb+JG34CP3x1o5NYl00hNsY/uZIgINyyayqYjLdZdMoHZT4mJG5vqWujsG+C2N8xwO0pCeMuS6fT0+9h8xG56SlRW4E3c+NXecxRkpXHtvBK3oySENXOLyc9M5dn9DW5HMQ6xAm/iQq/39eaZNGueiYj0VA9vWjyN3x1sxOvzux3HOMB+Ukxc+O2BRjr7Bnhb9Uy3oySUNy+ZTnuPl+0nbIz4RGQF3sSFn9eeZlZhFtdWWfNMJF2/oJTMNA/P7DvndhTjACvwJuadab/I5qMt3LWqDI/HRqCOpKz0FG65ajpP7zlH/4A10yQaK/Am5j1WW48q3LWqzO0oCeldK2bR3uPlResTn3CswJuY5vX5+e/tp3jj/BLKp2S7HSchvXF+CSW56Ty584zbUUyEWYE3Me3Xe8/R2NHHvdfOcTtKwkpN8XBn9SyeP9RIe0+/23FMBFmBNzHthy+fZE5Jjg0N7LB3rZyF16f88tWzbkcxEeRYgReRh0SkSUT2OXUMk9h2n25n12vtfHhthV1cddiSmfksnZXPT7aeQlXdjmMixMkz+B8Cb3Fw/ybBfe+lY+RlpnJXTbnbURKeiPDBNRXUNXaxzfrEJwzHCryqbgTsk2ImpK6xk2f2NfCRdZXkZqS6HScp3Fk9i4KsNH685ZTbUUyEuN4GLyL3iUitiNQ2N9vY1CbgP148SnZ6Ch+xi6tRk5WewntWlfHc/gYaO3rdjmMiwPUCr6oPqGqNqtaUltqFNAMnW7r55atn+eCaCopy0t2Ok1Q+uLYCvyoP/f6E21FMBLhe4I0Z7rsbjpGa4uGjb7Sz92irKM7h9mUz+enW17jQ43U7jpkkK/AmptS39fD4znruubqcqXmZbsdJSp+4voquvgF+tOWk21HMJDnZTfIRYAuwUETqReSjTh3LJI5v/e4IHo/w8Ruq3I6StK6amc+NC0v5wcsn6e4bcDuOmQQne9Hco6ozVDVNVctU9ftOHcskhrrGTp7YWc+H11YwoyDL7ThJ7dM3z+d8dz8Pbba2+HhmTTQmZvzzc4fJSU/lkzfMcztK0ls5u4hbrprGAxuPc77bhi+IV1bgTUzY9VobvznQyH3XzbWeMzHiz29dSHf/AP/x4lG3o5gJsgJvXKeqfOPZQ5TkpnPveus5EyvmT8vjrlVl/GjLKU6f73E7jpkAK/DGdc/tb2Dr8fN85ub55NhdqzHlc7csIMUjfPXpA25HMRNgBd64qtfr46tPH2TR9Dzev3q223HMMDMKsvjMzfP5zYFGXjjU6HYcM05W4I2r/vOlY5xpv8hX7lxCaop9HGPRR9fPYd7UXL78y/30en1uxzHjYD9RxjUnWrr57oZj3LFsBmvmFrsdx4wgPdXD3759CafPX+T//bbO7ThmHKzAG1f4/cqfP/Yq6ake/ur2q9yOY8awrqqEe1bP5oFNx9lx0gaJjRdW4I0rHt5ykh0n2/jy25YwvcCGJIgHf3X7YsqLsvn8z3fTZXe4xgUr8Cbq9p+9wNefOcSNC0t598pZbscxYcrJSOVf3ltNfdtF/voX+2zmpzhgBd5E1YWLXj7xk50UZafzT++pRsSm4osnV1dO4XNvWsCTu87w8Msn3Y5jxmAF3kSN1+fnM4/s4mz7Rf79D1ZSkpvhdiQzAZ+6cR5vWjyVv/vVQV4+2uJ2HDMKK/AmKvx+5QuP7eGluma++o6lrKoocjuSmSCPR/jm+5YztzSH+378CvvOXHA7khmBFXjjuP4BP5/7+W6e2HWGP7llAffYDU1xLz8zjYfvXU1BVhoffmg7hxs63Y5kQrACbxzV1t3PRx/ewVO7z/Jnty7kUzfZSJGJYkZBFj/66GpSU4T3fm8LO19rczuSGcYKvHHM1uOt3Pavm9h6vJV/vGsZ9984zy6qJpiq0lwe+/g6CrPTeP9/beUXu864HckMYQXeRFxTRy+ff3Q3dz+wlYxUD0984lreW1PudizjkPIp2Tz28XUsKyvks4/u5stP7bMhDWKEDd1nIqa+rYcHN53g0R2n8fmVT95Qxf03zrMRIpNAaV4GP/3YNXzjmUM8uPkEL9U187V3voF180rcjpbUJJZuVqipqdHa2lq3Y5hx6Bvw8eKhJp7YeYYXDjUBcOfymXzmpvlUluS4nM644eVjLXzpib2cbO3hTYun8flbFnDVzHy3YyUsEXlFVWtCvudkgReRtwDfBlKAB1X166OtbwU+ugZ8fs739NPW7cUjkJGaQkaah4xUT+B5qgeP5/U28/4BP+e7+zna1MXBcx1sOd7KtuOtdPf7KM3L4J0rZpQktXAAAAr9SURBVPHhdZXMKrT5VJNdr9fHg5uO872Nx+nsHWD9vBLuXl3OmxZPIzMtxe14CcWVAi8iKUAdcAtQD+wA7lHVEWcOsAI/Ob1eHx0XvVy46KX9opfWrn5au/to6Qz829rVT0tXH63d/bR29dHW4x1zn2kpQkZqCn5Vevovb1etLM5m/fwSbrlqOtdWFdtwv+YKF3q8PLzlJI/uOM2Z9oukp3q4Zs4UVs4uYsG0PKqm5lCSm0FRdjopHrsAPxFuFfi1wFdU9dbg678AUNV/GGmbiRb4O/5tE71e/6WxMS77ipQrlg1fb+i3QINLL1s27Fs09HsWzj4uP/bwLUdab5SMIb7OvgE//QN+RlKQlUZxbjoluRmU5KZTnJNBcW46xbkZTMlOR1H6vH76Bvz0en30DfjpGwj+6/UjAoVZaRTmpFNVksOC6Xl2J6oJm8+vbDnWyouHm9h0pJkjTV2XfaZFICc9lbQUIS3FE3wInlC9rsJYFKq3Viz/+ijKTufnH187oW1HK/BOXv2aBZwe8roeuGb4SiJyH3AfwOzZE7sBZl5pLl5f8NMil/0zeIwQyy5fb+gHQq54AhJ8MXy7y5fJlcsu+1RNcB8hMg5fJz3FQ35WGgVZaeRnpVE4pKAXZaeTnmpn18Y9KR5h/fwS1s8PXHS92O/jWHMXJ1q6Od/dz/nufjp7Bxjw+/H6/PQPKF6fn+Gnn6FOSK9YEuKcVUMtjCH5mWmO7NfJAh/qF+aV/xeqDwAPQOAMfiIH+tbdKyaymTHGJVnpKSydVcDSWQVuR0loTp7W1QNDOz+XAWcdPJ4xxpghnCzwO4D5IjJHRNKBu4FfOng8Y4wxQzjWRKOqAyLyKeA5At0kH1LV/U4dzxhjzOUcvcVQVX8N/NrJYxhjjAnNulYYY0yCsgJvjDEJygq8McYkKCvwxhiToGJqNEkRaQZORWBXJUCszwYcDxkhPnJaxsiJh5yW8XIVqloa6o2YKvCRIiK1I43NECviISPER07LGDnxkNMyhs+aaIwxJkFZgTfGmASVqAX+AbcDhCEeMkJ85LSMkRMPOS1jmBKyDd4YY0zinsEbY0zSswJvjDEJKq4LvIgUishjInJIRA4Gpwkc+n6RiDwpIntEZLuILI1yvoUisnvIo0NEPjtsHRGRfxWRo8GcK2Mw4yIR2SIifSLyp9HMN86cfxD8Hu4RkZdFpDoGM749mG+3iNSKyPpYyzhk3atFxCcid0UzY7g5ReQGEbkwZJ3/G2sZh+TcLSL7ReSlaGZEVeP2ATwMfCz4PB0oHPb+PwFfDj5fBDzvYtYUoIHATQlDl98GPENgBqw1wLYYzDgVuBr4e+BPY+D/faSc64Ci4PO3xuj3MpfXr30tAw7FWsYh771AYDTYu2L0//sG4Gk3s4WRsRA4AMwOvp4azVxxewYvIvnAdcD3AVS1X1Xbh612FfB88P1DQKWITItq0NfdDBxT1eF36r4d+JEGbAUKRWRG9OMBI2RU1SZV3QF43Yl1hZFyvqyqbcGXWwnMIuaWkTJ2afAnHcgh5AyiUTPSZxLg08DjQFN0I4U0Ws5YMVLG9wNPqOprEPhZimaouC3wwFygGfiBiOwSkQdFJGfYOq8C7wIQkdVABe790N8NPBJieajJyWdFJdGVRsoYa8LJ+VECfxm5ZcSMIvJOETkE/Aq4N6qpLhcyo4jMAt4J/GfUE4U22v/3WhF5VUSeEZEl0Qw1zEgZFwBFIrJBRF4RkQ9FM1Q8F/hUYCXwXVVdAXQDXxy2ztcJfHN3Ezgj2QUMRDUlEJyy8E7gf0K9HWJZ1M/qxsgYM8LJKSI3EijwX4hWrmHHHzWjqj6pqouAdwBfjWa2QWNk/BbwBVX1RTfVlcbIuZNAk0g18G/AL6KZbdAYGVOBVcDtwK3AX4vIgmhlc3RGJ4fVA/Wqui34+jGGFXhV7QA+AoGLmcCJ4CPa3grsVNXGEO/FyuTko2WMJaPmFJFlwIPAW1W1NarJXhfW91JVN4pIlYiUqGq0B88aLWMN8LPAjwwlwG0iMqCqbhTQEXMGf74Hn/9aRP4jBr+X9UCLqnYD3SKyEagG6qIRLG7P4FW1ATgtIguDi24mcDHjkmAvm/Tgy48BG4d+KKLoHkb+E/OXwIeCvWnWABdU9Vz0ol0yWsZYMmJOEZkNPAF8UFWj8gM0gtEyzguebBDsMZUOuPGLaMSMqjpHVStVtZLAidMnXSruMPr3cvqQ7+VqAvUspr6XwFPAG0UkVUSygWuAg1FL5vbV58k8gOVALbCHwJ9nRcDHgY8H318LHAEOEfjBL3IhYzaBD13BkGVDMwrw78AxYC9QE4MZpxM4E+kA2oPP82Mw54NAG7A7+KiNwYxfAPYH820B1sdaxmHr/hCXetGE8b38VPB7+SqBi+rrYi1j8PWfETj53Ad8Npr5bKgCY4xJUHHbRGOMMWZ0VuCNMSZBWYE3xpgEZQXeGGMSlBV4Y4xJUFbgTdSJSJmIPCUiR0TkmIh8e8j9CsaYCLECb6IqeGPKE8AvVHU+gbE6cgmMVGmMiSAr8CbabgJ6VfUHABoY7+RzwL0iki0ifygi3xlcWUS+IyJ/GHyeIyIPiciO4ABzbw8uH22bkyJSIiK5IvJ7EXnz0OXB5z8RkX3DgwbH8X46+Px6EdkmIgXB5RslMNfAARH5TxHxBNfrGrL9JhF5WkSyhowZ3i8ie4PPa0TkhxIcb11EPiYiGsxbOZhJRBYHB9QqD77+vIjsCz4+G1xWKSIXg/s9LiL/HJH/LRPX4nksGhOflgCvDF2gqh0i8howb4xt/xJ4QVXvFZFCYLuI/C6MY6YBPyYwMN1vhr4hIm8ARp0IJrjOt4HbVPVC8O741QSGoz4FPEtg1NLHhmxzO1BAYOiJiwTuukZETgI3anC8lOC+EJFMAndAXjacbHBkx58B71fV0yKyisD4StcQuAt6mwQmkWgjMFztcgkMib0fcGVyFhM77AzeRJsQerTMkZYP9Wbgi8HRQTcAmcDs4HvvGzxLBt43bLv/Amao6k9C7PPvgC+PcsyZBIYdflhVhw4Ct11Vjwf/AnkEuDQzU7AZ6i+Br43x9Qx1P4EJbC4OWZZL4JfHBlXdH1y2HnhSVbtVtYtAc9cbg+9VBb/+OgK/kEySswJvom0/gdEKL5HA5C3lBMbjGY0A71bV5cHHbFUdHLjp0cHlwKPDtjsCvCoiw8deXwd0ERjLZCSLgE8CfyQipUOWD/9lNPT1PQR+ATWM8fUMyg9u871hy8uBfwBuFJHFwWWhhpcedCz49c8A7hls0jHJywq8ibbngWwJTnwgIinAvwA/VNWeMbZ9Dvj0kBEEV4R5zL8HPg/8uVw+o9dXgLHm8XxBVX9J4Gx86FnxahGZE2x7fx+wObjcQ+Cawj+GmY3g+v+qqv3Dlh9U1f8mMJfB94Jf90bgHcHrFTkEJubYNGy7PsBHYPA9k8SswJuo0sDodu8E3iMiRwg0J/QCXxqy2rtEZLOIbCbQtv0lEZlOYHKMNGBP8AJk2JNlaGBs+L8lMDHEoG2qOtZfDYPb/wgoFpHbgou2EJhQZh+BOQaeDC7PAh7TK6ePHI0AoZqPBo/9EoERUT+hqjsJjPC4HdgGPKiqu4KrDjbR7ANeVNU948hgEpCNJmlinoj8EPiKqp50OQoQ6F1DYPLxO9zOYsxo7AzexIPHCfQSMcaMg53BG2NMgrIzeGOMSVBW4I0xJkFZgTfGmARlBd4YYxKUFXhjjElQ/x/pYoFtSyuMZQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Распределение значений отзывов критивов спортивных игр нормальное\n"
     ]
    }
   ],
   "source": [
    "sport_games_critics_bootstrap_means = []\n",
    "for _ in range(1000):\n",
    "    sport_games_critics_bootstrap_means.append(df_sport_games_critics.sample(frac=1, replace=True).mean())\n",
    "df_sport_games_critics_bootstrap_means = pd.DataFrame(sport_games_critics_bootstrap_means)\n",
    "df_sport_games_critics_bootstrap_means.plot(kind='density')\n",
    "plt.xlabel('Оценки критиков')\n",
    "plt.show()\n",
    "\n",
    "stat, p = scipy.stats.shapiro(sport_games_critics_bootstrap_means)\n",
    "alpha = 0.05\n",
    "\n",
    "if p < alpha:\n",
    "    print('Распределение значений отзывов критивов спортивных игр не является нормальным')\n",
    "else:\n",
    "    print('Распределение значений отзывов критивов спортивных игр нормальное')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "После приведения значений к нормальному распределению используем одновыборочный тест Стьюдента для проверки наших гипотез.\n",
    "\n",
    "Предположим, что если критикам нравятся спортивные игры, то средняя оценка должна быть не менее 8.\n",
    "\n",
    "H0 - Рейтинг спортивных игр у критиков не менее 8  \n",
    "H1 - Рейтинг спортивных игр у критиков менее 8\n",
    "\n",
    "Используем односторонний тест, так как интересуют значения только с одной стороны."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Критикам не нравятся спортивные игры, ставят оценки меньше 8\n"
     ]
    }
   ],
   "source": [
    "alpha = 0.05\n",
    "\n",
    "t, p = scipy.stats.ttest_1samp(sport_games_critics_bootstrap_means, 8)\n",
    "\n",
    "if p/2 < alpha and t > 0:\n",
    "    print('Критикам нравятся спортивные игры, ставят оценки больше 8')\n",
    "else:\n",
    "    print('Критикам не нравятся спортивные игры, ставят оценки меньше 8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2) Критикам нравятся больше игры на PC или на PS4?\n",
    "\n",
    "Так как сраниваем 2 выборки не имеющего нормального распределения самым адекватным критерием для оценки гипотез будет U-критерий Манна-Уитни для независимых выборок.\n",
    "\n",
    "H0 - Оценки критиков для PC и PS4 не отличаются  \n",
    "H1 - Оценки критиков для PC и PS4 различны\n",
    "\n",
    "Будем выполнять двусторонний тест, так как интересуют отличия с двух сторон плотности распределения значений."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Platform</th>\n",
       "      <th>Critic_Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>PC</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>PS4</td>\n",
       "      <td>9.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>PC</td>\n",
       "      <td>9.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>PS4</td>\n",
       "      <td>9.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>PS4</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Platform  Critic_Score\n",
       "9        PC          10.0\n",
       "20      PS4           9.7\n",
       "36       PC           9.3\n",
       "45      PS4           9.8\n",
       "50      PS4           8.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_critics_pc_ps4 = (df_videogames[df_videogames.Platform.isin(['PC', 'PS4'])][['Platform', 'Critic_Score']]\n",
    "                     .dropna())\n",
    "df_critics_pc_ps4[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Оценки критиков для PC и PS4 различны\n",
      "Критикам больше нравятся игры для PS4\n"
     ]
    }
   ],
   "source": [
    "stat, p = scipy.stats.mannwhitneyu(df_critics_pc_ps4[df_critics_pc_ps4.Platform == 'PC'].Critic_Score,\n",
    "                                   df_critics_pc_ps4[df_critics_pc_ps4.Platform == 'PS4'].Critic_Score,\n",
    "                                   alternative='two-sided')\n",
    "\n",
    "ps4_critic_mean = df_critics_pc_ps4[df_critics_pc_ps4.Platform == 'PS4'].Critic_Score.mean()\n",
    "pc_critic_mean = df_critics_pc_ps4[df_critics_pc_ps4.Platform == 'PC'].Critic_Score.mean()\n",
    "\n",
    "alpha = 0.05\n",
    "\n",
    "if p < alpha:\n",
    "    print('Оценки критиков для PC и PS4 различны')\n",
    "    if ps4_critic_mean > pc_critic_mean:\n",
    "        print('Критикам больше нравятся игры для PS4')\n",
    "    else:\n",
    "        print('Критикам больше нравятся игры для PC')\n",
    "else:\n",
    "    print('Оценки критиков для PC и PS4 не отличаются')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3) Критикам больше нравятся стрелялки или стратегии?\n",
    "\n",
    "Аналогично имеем 2 выборки не имеющие нормального распределения, самым адекватным критерием для оценки гипотез будет U-критерий Манна-Уитни для независимых выборок.\n",
    "\n",
    "H0 - Оценки критиков между стратегиями и стрелялками не отличаются  \n",
    "H1 - Оценки критиков между стратегиями и стрелялками различны"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Genre</th>\n",
       "      <th>Critic_Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Shooter</td>\n",
       "      <td>9.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Shooter</td>\n",
       "      <td>8.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Shooter</td>\n",
       "      <td>8.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>Shooter</td>\n",
       "      <td>9.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>Shooter</td>\n",
       "      <td>9.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Genre  Critic_Score\n",
       "36  Shooter           9.3\n",
       "40  Shooter           8.7\n",
       "41  Shooter           8.8\n",
       "43  Shooter           9.6\n",
       "49  Shooter           9.5"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_critics_shooter_strategy = (df_videogames[df_videogames.Genre.isin(['Shooter', 'Strategy'])]\n",
    "                     [['Genre', 'Critic_Score']].dropna())\n",
    "df_critics_shooter_strategy[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Оценки критиков между стратегиями и стрелялками не отличаются\n"
     ]
    }
   ],
   "source": [
    "stat, p = scipy.stats.mannwhitneyu(df_critics_shooter_strategy[df_critics_shooter_strategy.Genre == 'Shooter']\n",
    "                                   .Critic_Score,\n",
    "                                   df_critics_shooter_strategy[df_critics_shooter_strategy.Genre == 'Strategy']\n",
    "                                   .Critic_Score,\n",
    "                                   alternative='two-sided')\n",
    "\n",
    "shooter_critic_mean = (df_critics_shooter_strategy[df_critics_shooter_strategy.Genre == 'Shooter']\n",
    "                       .Critic_Score.mean())\n",
    "strategy_critic_mean = (df_critics_shooter_strategy[df_critics_shooter_strategy.Genre == 'Strategy']\n",
    "                        .Critic_Score.mean())\n",
    "\n",
    "alpha = 0.05\n",
    "\n",
    "if p < alpha:\n",
    "    print('Оценки критиков для PC и PS4 различны')\n",
    "    if shooter_critic_mean > strategy_critic_mean:\n",
    "        print('Критикам больше нравятся стратегии')\n",
    "    else:\n",
    "        print('Критикам больше нравятся стрелялки')\n",
    "else:\n",
    "    print('Оценки критиков между стратегиями и стрелялками не отличаются')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Задание 2\n",
    "\n",
    "Реализуйте базовую модель логистической регрессии для классификации текстовых сообщений (используемые данные [здесь](https://github.com/obulygin/pyda_homeworks/blob/master/stat_case_study/spam.csv)) по признаку спама. Для этого:\n",
    "\n",
    "1) Привидите весь текст к нижнему регистру;  \n",
    "2) Удалите мусорные символы;  \n",
    "3) Удалите стоп-слова;  \n",
    "4) Привидите все слова к нормальной форме;  \n",
    "5) Преобразуйте все сообщения в вектора TF-IDF. Вам поможет следующий код:  \n",
    "\n",
    "```\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "tfidf = TfidfVectorizer()\n",
    "tfidf_matrix = tfidf.fit_transform(df.Message)\n",
    "names = tfidf.get_feature_names()\n",
    "tfidf_matrix = pd.DataFrame(tfidf_matrix.toarray(), columns=cname)\n",
    "```\n",
    "\n",
    "Можете поэкспериментировать с параметрами [TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html);  \n",
    "6) Разделите данные на тестовые и тренировочные в соотношении 30/70, укажите `random_state=42`. Используйте [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html);  \n",
    "7) Постройте модель [логистической регрессии](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html), укажите `random_state=42`, оцените ее точность на тестовых данных;  \n",
    "8) Опишите результаты при помощи [confusion_matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html?highlight=confusion_matrix#sklearn.metrics.confusion_matrix);  \n",
    "9) Постройте датафрейм, который будет содержать все исходные тексты сообщений, классифицированные неправильно (с указанием фактического и предсказанного)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ham</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ham</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>spam</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ham</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ham</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Category                                            Message\n",
       "0      ham  Go until jurong point, crazy.. Available only ...\n",
       "1      ham                      Ok lar... Joking wif u oni...\n",
       "2     spam  Free entry in 2 a wkly comp to win FA Cup fina...\n",
       "3      ham  U dun say so early hor... U c already then say...\n",
       "4      ham  Nah I don't think he goes to usf, he lives aro..."
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_spam_orig = pd.read_csv('spam.csv')\n",
    "df_spam_orig[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Category                                            Message\n",
       "0         0  Go until jurong point, crazy.. Available only ...\n",
       "1         0                      Ok lar... Joking wif u oni...\n",
       "2         1  Free entry in 2 a wkly comp to win FA Cup fina...\n",
       "3         0  U dun say so early hor... U c already then say...\n",
       "4         0  Nah I don't think he goes to usf, he lives aro..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_spam = copy.copy(df_spam_orig)\n",
    "df_spam.Category = df_spam.Category.apply(lambda value: 1 if value == 'spam' else 0)\n",
    "display(df_spam[:5])\n",
    "df_spam.iloc[0].Message"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Приводим весь текст к нижнему регистру"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>go until jurong point, crazy.. available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>ok lar... joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>free entry in 2 a wkly comp to win fa cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>u dun say so early hor... u c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>nah i don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Category                                            Message\n",
       "0         0  go until jurong point, crazy.. available only ...\n",
       "1         0                      ok lar... joking wif u oni...\n",
       "2         1  free entry in 2 a wkly comp to win fa cup fina...\n",
       "3         0  u dun say so early hor... u c already then say...\n",
       "4         0  nah i don't think he goes to usf, he lives aro..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'go until jurong point, crazy.. available only in bugis n great world la e buffet... cine there got amore wat...'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_spam.Message = df_spam.Message.apply(lambda x: x.lower())\n",
    "display(df_spam[:5])\n",
    "df_spam.iloc[0].Message"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Удалим мусорные символы"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>go jurong point crazy available bugis n great ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>ok lar joking wif u oni</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>free entry 2 wkly comp win fa cup final tkts 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>u dun say early hor u c already say</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>nah think goes usf lives around though</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Category                                            Message\n",
       "0         0  go jurong point crazy available bugis n great ...\n",
       "1         0                            ok lar joking wif u oni\n",
       "2         1  free entry 2 wkly comp win fa cup final tkts 2...\n",
       "3         0                u dun say early hor u c already say\n",
       "4         0             nah think goes usf lives around though"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'go jurong point crazy available bugis n great world la e buffet cine got amore wat'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_spam.Message = df_spam.Message.apply(lambda x: re.sub(r'[\\W_]', ' ', x))\n",
    "\n",
    "\n",
    "stopwords_set = set(stopwords.words('english'))\n",
    "df_spam.Message = (df_spam.Message\n",
    "                   .apply(lambda x: ' '.join([word for word in x.split() if word not in stopwords_set])))\n",
    "display(df_spam[:5])\n",
    "df_spam.iloc[0].Message"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Приведем все слова к нормальной форме"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>go jurong point crazy available bugis n great ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>ok lar joking wif u oni</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>free entry 2 wkly comp win fa cup final tkts 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>u dun say early hor u c already say</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>nah think go usf life around though</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Category                                            Message\n",
       "0         0  go jurong point crazy available bugis n great ...\n",
       "1         0                            ok lar joking wif u oni\n",
       "2         1  free entry 2 wkly comp win fa cup final tkts 2...\n",
       "3         0                u dun say early hor u c already say\n",
       "4         0                nah think go usf life around though"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'go jurong point crazy available bugis n great world la e buffet cine got amore wat'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wordnet_lemmatizer = WordNetLemmatizer()\n",
    "df_spam.Message = (df_spam.Message\n",
    "                   .apply(lambda x: ' '.join([wordnet_lemmatizer.lemmatize(word) for word in x.split()])))\n",
    "display(df_spam[:5])\n",
    "df_spam.iloc[0].Message"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5) Преобразуйте все сообщения в вектора TF-IDF. Вам поможет следующий код:  \n",
    "\n",
    "```\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "tfidf = TfidfVectorizer()\n",
    "tfidf_matrix = tfidf.fit_transform(df.Message)\n",
    "names = tfidf.get_feature_names()\n",
    "tfidf_matrix = pd.DataFrame(tfidf_matrix.toarray(), columns=cname)\n",
    "```\n",
    "\n",
    "Можете поэкспериментировать с параметрами [TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html);  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>00</th>\n",
       "      <th>000</th>\n",
       "      <th>000pes</th>\n",
       "      <th>008704050406</th>\n",
       "      <th>0089</th>\n",
       "      <th>0121</th>\n",
       "      <th>01223585236</th>\n",
       "      <th>01223585334</th>\n",
       "      <th>0125698789</th>\n",
       "      <th>02</th>\n",
       "      <th>...</th>\n",
       "      <th>zhong</th>\n",
       "      <th>zindgi</th>\n",
       "      <th>zoe</th>\n",
       "      <th>zogtorius</th>\n",
       "      <th>zoom</th>\n",
       "      <th>zouk</th>\n",
       "      <th>zyada</th>\n",
       "      <th>èn</th>\n",
       "      <th>ú1</th>\n",
       "      <th>〨ud</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5567</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5568</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5569</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5570</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5571</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5572 rows × 8048 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       00  000  000pes  008704050406  0089  0121  01223585236  01223585334  \\\n",
       "0     0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "1     0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "2     0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "3     0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "4     0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "...   ...  ...     ...           ...   ...   ...          ...          ...   \n",
       "5567  0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "5568  0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "5569  0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "5570  0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "5571  0.0  0.0     0.0           0.0   0.0   0.0          0.0          0.0   \n",
       "\n",
       "      0125698789   02  ...  zhong  zindgi  zoe  zogtorius  zoom  zouk  zyada  \\\n",
       "0            0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "1            0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "2            0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "3            0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "4            0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "...          ...  ...  ...    ...     ...  ...        ...   ...   ...    ...   \n",
       "5567         0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "5568         0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "5569         0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "5570         0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "5571         0.0  0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0    0.0   \n",
       "\n",
       "       èn   ú1  〨ud  \n",
       "0     0.0  0.0  0.0  \n",
       "1     0.0  0.0  0.0  \n",
       "2     0.0  0.0  0.0  \n",
       "3     0.0  0.0  0.0  \n",
       "4     0.0  0.0  0.0  \n",
       "...   ...  ...  ...  \n",
       "5567  0.0  0.0  0.0  \n",
       "5568  0.0  0.0  0.0  \n",
       "5569  0.0  0.0  0.0  \n",
       "5570  0.0  0.0  0.0  \n",
       "5571  0.0  0.0  0.0  \n",
       "\n",
       "[5572 rows x 8048 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tfidf = TfidfVectorizer()\n",
    "tfidf_matrix = tfidf.fit_transform(df_spam.Message)\n",
    "names = tfidf.get_feature_names()\n",
    "tfidf_matrix = pd.DataFrame(tfidf_matrix.toarray(), columns=names)\n",
    "tfidf_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "6) Разделите данные на тестовые и тренировочные в соотношении 30/70, укажите random_state=42. Используйте train_test_split;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>00</th>\n",
       "      <th>000</th>\n",
       "      <th>000pes</th>\n",
       "      <th>008704050406</th>\n",
       "      <th>0089</th>\n",
       "      <th>0121</th>\n",
       "      <th>01223585236</th>\n",
       "      <th>01223585334</th>\n",
       "      <th>0125698789</th>\n",
       "      <th>...</th>\n",
       "      <th>zhong</th>\n",
       "      <th>zindgi</th>\n",
       "      <th>zoe</th>\n",
       "      <th>zogtorius</th>\n",
       "      <th>zoom</th>\n",
       "      <th>zouk</th>\n",
       "      <th>zyada</th>\n",
       "      <th>èn</th>\n",
       "      <th>ú1</th>\n",
       "      <th>〨ud</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 8049 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Category   00  000  000pes  008704050406  0089  0121  01223585236  \\\n",
       "0         0  0.0  0.0     0.0           0.0   0.0   0.0          0.0   \n",
       "1         0  0.0  0.0     0.0           0.0   0.0   0.0          0.0   \n",
       "2         1  0.0  0.0     0.0           0.0   0.0   0.0          0.0   \n",
       "3         0  0.0  0.0     0.0           0.0   0.0   0.0          0.0   \n",
       "4         0  0.0  0.0     0.0           0.0   0.0   0.0          0.0   \n",
       "\n",
       "   01223585334  0125698789  ...  zhong  zindgi  zoe  zogtorius  zoom  zouk  \\\n",
       "0          0.0         0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0   \n",
       "1          0.0         0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0   \n",
       "2          0.0         0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0   \n",
       "3          0.0         0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0   \n",
       "4          0.0         0.0  ...    0.0     0.0  0.0        0.0   0.0   0.0   \n",
       "\n",
       "   zyada   èn   ú1  〨ud  \n",
       "0    0.0  0.0  0.0  0.0  \n",
       "1    0.0  0.0  0.0  0.0  \n",
       "2    0.0  0.0  0.0  0.0  \n",
       "3    0.0  0.0  0.0  0.0  \n",
       "4    0.0  0.0  0.0  0.0  \n",
       "\n",
       "[5 rows x 8049 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tf_idf = pd.merge(df_spam[['Category']], tfidf_matrix, left_index=True, right_index=True)\n",
    "df_tf_idf[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(df_tf_idf.iloc[:, 1:],\n",
    "                                                    df_tf_idf['Category'], \n",
    "                                                    test_size=0.3,\n",
    "                                                    random_state=42)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "7) Постройте модель логистической регрессии, укажите random_state=42, оцените ее точность на тестовых данных;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\program files\\python36\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Точность предсказания: 0.958732057416268\n"
     ]
    }
   ],
   "source": [
    "lda = LogisticRegression(random_state=42)\n",
    "lda.fit(X_train, y_train)\n",
    "y_predict = lda.predict(X_test)\n",
    "\n",
    "print('Точность предсказания:', accuracy_score(y_test, y_predict))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "8) Опишите результаты при помощи [confusion_matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html?highlight=confusion_matrix#sklearn.metrics.confusion_matrix);  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1445,    3],\n",
       "       [  66,  158]], dtype=int64)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion_matrix(y_test, y_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1445 - Количество истинно-положительных результатов. Количество писем, определенных моделью как спам, и они дейсвтительно являлись спамом.\n",
    "\n",
    "3    - кол-во ложноположительных решений. Кол-во писем, принятых за спам, но реально спамом они не являются.\n",
    "\n",
    "66   - кол-во ложноотрицательных решений. Кол-во писем, принятых за нормальные, но реально являющиеся спамом.\n",
    "\n",
    "158  - кол-во истинно-отрицательных решений. Кол-во писем, которые отмечены нормальными и реально таковыми являются."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "9) Постройте датафрейм, который будет содержать все исходные тексты сообщений, классифицированные неправильно (с указанием фактического и предсказанного)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Predict_Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2952</th>\n",
       "      <td>ham</td>\n",
       "      <td>spam</td>\n",
       "      <td>Hey now am free you can call me.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>881</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Reminder: You have not downloaded the content ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1961</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Guess what! Somebody you know secretly fancies...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3864</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Oh my god! I've found your number again! I'm s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2575</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Your next amazing xxx PICSFREE1 video will be ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3548</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Rock yr chik. Get 100's of filthy films &amp;XXX p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2402</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Babe: U want me dont u baby! Im nasty and have...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4527</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>I want some cock! My hubby's away, I need a re...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>XXXMobileMovieClub: To use your credit, click ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2663</th>\n",
       "      <td>spam</td>\n",
       "      <td>ham</td>\n",
       "      <td>Hello darling how are you today? I would love ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Category Predict_Category  \\\n",
       "2952      ham             spam   \n",
       "881      spam              ham   \n",
       "1961     spam              ham   \n",
       "3864     spam              ham   \n",
       "2575     spam              ham   \n",
       "3548     spam              ham   \n",
       "2402     spam              ham   \n",
       "4527     spam              ham   \n",
       "15       spam              ham   \n",
       "2663     spam              ham   \n",
       "\n",
       "                                                Message  \n",
       "2952                   Hey now am free you can call me.  \n",
       "881   Reminder: You have not downloaded the content ...  \n",
       "1961  Guess what! Somebody you know secretly fancies...  \n",
       "3864  Oh my god! I've found your number again! I'm s...  \n",
       "2575  Your next amazing xxx PICSFREE1 video will be ...  \n",
       "3548  Rock yr chik. Get 100's of filthy films &XXX p...  \n",
       "2402  Babe: U want me dont u baby! Im nasty and have...  \n",
       "4527  I want some cock! My hubby's away, I need a re...  \n",
       "15    XXXMobileMovieClub: To use your credit, click ...  \n",
       "2663  Hello darling how are you today? I would love ...  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_real_predict = pd.DataFrame(y_test)\n",
    "df_real_predict['Predict_Category'] = y_predict\n",
    "df_real_predict = pd.merge(df_real_predict, df_spam_orig[['Message']], how='left', \n",
    "                           left_index=True, right_index=True)\n",
    "df_real_predict.Category = df_real_predict.Category.apply(lambda value: 'spam' if value == 1 else 'ham')\n",
    "df_real_predict.Predict_Category = (df_real_predict.Predict_Category\n",
    "                                    .apply(lambda value: 'spam' if value == 1 else 'ham'))\n",
    "df_uncorrect_predicts = df_real_predict[df_real_predict.Category != df_real_predict.Predict_Category]\n",
    "df_uncorrect_predicts[:10]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
